library(mixOmics)
library(scMerge) ## for combing sce's
## load the CellBench data from GitHub
data = "https://github.com/LuyiTian/CellBench_data/blob/master/data/sincell_with_class.RData?raw=true"
load(url(data))
## create a combined sce
sce.list = c("CELseq2" = sce_sc_CELseq2_qc,
"Dropseq" = sce_sc_Dropseq_qc,
"10X" = sce_sc_10x_qc)
## merge the sce's into one, keeping common genes and the cell_line colData
sce = scMerge::sce_cbind(sce_list = sce.list, cut_off_batch = 0,
cut_off_overall = 0, method = "intersect",
batch_names = names(sce.list),
colData_names = "cell_line")
Default model
## load the wrapper function - change to your rown directory
source("../../wrapper/mixOmics_mint.R")
## check if there are duplicate cell names
sum(duplicated(colnames(sce)))
## [1] 225
## ensure there are no duplicate cell names
colnames(sce) = make.unique(colnames(sce))
## run the wrapper and get both sce and mint.splsda outputs
sce.mint = mixOmics_mint(sce, colData.batch = "batch", colData.class = "cell_line",
keepX = c(50,50), ncomp = 2, output = "both", print.log = FALSE)
## get the mint object
mint = sce.mint$mint
## plot the mint.splsda components for both batches and all cell types
plotIndiv(mint$splsda, title = NULL, subtitle = "MINT sPLSDA",
legend = TRUE, legend.title = "Cell Line", study = "global")

## plot the mint.splsda components for all cell types for individual batches
plotIndiv(mint$splsda, title = "Individual Studies", subtitle = "MINT sPLSDA",
legend = TRUE, legend.title = "Cell Line", study = "CELseq2")

## plot the mint.splsda components for all cell types, separated by batches
plotIndiv(mint$splsda, title = "All Studies",
legend = TRUE, legend.title = "Cell Line", study = "all.partial")

Tuned model
## takes ~ 90 sec
## run the wrapper and get both tuned sce and mint.splsda outputs - start with a coarse grid
sce.mint.tuned = mixOmics_mint(sce, colData.batch = "batch", colData.class = "cell_line",
ncomp = 2, tune.keepX = seq(5,65,20), output = "both")
## get the tuner outputs
tuned_mint = sce.mint.tuned$mint
## plot the error rate over the assessed number of variables
plot(tuned_mint$tune)

## output the optimum number of variables
tuned_mint$tune$choice.keepX
## comp 1 comp 2
## 45 25
refine the search grid
fine.keepX = c(seq(35,47,3), ## for comp 1
seq(15,27, 3)) ## for comp 2
## run the wrapper with and tune over the fine grid
sce.mint.fine.tuned = mixOmics_mint(sce, colData.batch = "batch", colData.class = "cell_line",
ncomp = 2, tune.keepX = fine.keepX, output = "both")
More visualisations
## clustered image map
cim(mint$splsda, comp = c(1,2), margins=c(10,5),
row.sideColors = color.mixo(as.numeric(mint$splsda$Y)), row.names = FALSE,
title = 'MINT sPLS-DA', save = "png", name.save = "heatmap")
knitr::include_graphics("heatmap.png")

## loadings for each signature genes in each study
plotLoadings(mint$splsda, contrib='max', method = 'mean', comp=2,
study='all.partial', legend=FALSE, title=NULL,
subtitle = unique(mint$splsda$study) )

## correlation circle plot
plotVar(mint$splsda, cex = 3)

## roc curves for each batch
auroc(mint$splsda, roc.comp = 1, roc.study='CELseq2')

## $Comp1
## AUC p-value
## H1975 vs Other(s) 0.6967 2.863e-08
## H2228 vs Other(s) 1.0000 0.000e+00
## HCC827 vs Other(s) 0.7745 1.105e-12
##
## $Comp2
## AUC p-value
## H1975 vs Other(s) 0.9936 0
## H2228 vs Other(s) 1.0000 0
## HCC827 vs Other(s) 0.9999 0